import argparse
import torch
import os
import json
from tqdm import tqdm
import shortuuid

from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
from llava.conversation import conv_templates, SeparatorStyle
from llava.model.builder import load_pretrained_model
from llava.utils import disable_torch_init
from llava.mm_utils import tokenizer_image_token, process_images, get_model_name_from_path
from torch.utils.data import Dataset, DataLoader

from PIL import Image
import math
from azfuse import File


def split_list(lst, n):
    """Split a list into n (roughly) equal-sized chunks"""
    chunk_size = math.ceil(len(lst) / n)  # integer division
    return [lst[i:i+chunk_size] for i in range(0, len(lst), chunk_size)]


def get_chunk(lst, n, k):
    chunks = split_list(lst, n)
    return chunks[k]


# Custom dataset class
class CustomDataset(Dataset):
    def __init__(self, questions, predictions, image_folder, tokenizer, image_processor, model_config):
        self.questions = questions
        self.image_folder = image_folder
        self.tokenizer = tokenizer
        self.image_processor = image_processor
        self.model_config = model_config
        self.predictions = predictions
        to_prepare = []
        self.qid2q = {}
        for line, pred_line in zip(self.questions, self.predictions):
            self.qid2q[line["question_id"]] = line
            image_file = line["image"]
            image_path = os.path.join(self.image_folder, image_file)
            to_prepare.append(image_path)
        to_prepare = list(set(to_prepare))
        print(f'Preparing {len(to_prepare)} images')
        File.prepare(to_prepare)

    def __getitem__(self, index):
        # line = self.questions[index]
        pred_line = self.predictions[index]
        line = self.qid2q[pred_line["question_id"]]
        # assert line["question_id"] == pred_line["question_id"]
        image_file = line["image"]
        qs = pred_line["prompt"]
        assert "text" in pred_line
        ans = pred_line["text"]
        if not ans.endswith('.'):
            ans = ans + '.'
        qs = "Question: " + qs + "\nAnswer: " + ans + "\nIs the answer to the question correct, yes or no?"
        if self.model_config.mm_use_im_start_end:
            qs = DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN + '\n' + qs
        else:
            qs = DEFAULT_IMAGE_TOKEN + '\n' + qs

        conv = conv_templates[args.conv_mode].copy()
        conv.append_message(conv.roles[0], qs)
        conv.append_message(conv.roles[1], None)
        prompt = conv.get_prompt()
        image_path = os.path.join(self.image_folder, image_file)
        with File.open(image_path, "rb") as f:
            image = Image.open(f).convert('RGB')
        image_tensor = process_images([image], self.image_processor, self.model_config)[0]

        input_ids = tokenizer_image_token(prompt, self.tokenizer, IMAGE_TOKEN_INDEX, return_tensors='pt')

        return input_ids, image_tensor, image.size

    def __len__(self):
        return len(self.questions)


def collate_fn(batch):
    input_ids, image_tensors, image_sizes = zip(*batch)
    input_ids = torch.stack(input_ids, dim=0)
    image_tensors = torch.stack(image_tensors, dim=0)
    return input_ids, image_tensors, image_sizes


# DataLoader
def create_data_loader(questions, predictions, image_folder, tokenizer, image_processor, model_config, batch_size=1, num_workers=4):
    assert batch_size == 1, "batch_size must be 1"
    dataset = CustomDataset(questions, predictions, image_folder, tokenizer, image_processor, model_config)
    data_loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False, collate_fn=collate_fn)
    return data_loader


def eval_model(args):
    # Model
    answers_file = os.path.expanduser(args.answers_file)
    if File.isfile(answers_file):
        print(f"Answers file {answers_file} already exists, skipping.")
        if not args.overwrite:
            return
        else:
            print(f"Overwriting {answers_file}")
    disable_torch_init()
    model_path = os.path.expanduser(args.model_path)
    model_name = get_model_name_from_path(model_path)
    tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, args.model_base, model_name)

    if args.question_file.endswith('.jsonl'):
        questions = [json.loads(q) for q in File.open(os.path.expanduser(args.question_file), "r")]
    else:
        questions = json.load(File.open(os.path.expanduser(args.question_file), "r"))
    if args.pred_file.endswith('.jsonl'):
        predictions = [json.loads(q) for q in File.open(os.path.expanduser(args.pred_file), "r")]
    else:
        predictions = json.load(File.open(os.path.expanduser(args.pred_file), "r"))
    questions = get_chunk(questions, args.num_chunks, args.chunk_idx)
    predictions = get_chunk(predictions, args.num_chunks, args.chunk_idx)
    os.makedirs(os.path.dirname(answers_file), exist_ok=True)
    ans_outputs = []
    if 'plain' in model_name and 'finetune' not in model_name.lower() and 'mmtag' not in args.conv_mode:
        args.conv_mode = args.conv_mode + '_mmtag'
        print(f'It seems that this is a plain model, but it is not using a mmtag prompt, auto switching to {args.conv_mode}.')

    data_loader = create_data_loader(questions, predictions, args.image_folder, tokenizer, image_processor, model.config)
    use_cache = model.config.use_cache
    print("use_cache:", use_cache)
    gen_kwargs = {}
    if use_cache:
        gen_kwargs.update({"use_cache": use_cache})

    for (input_ids, image_tensor, image_sizes), line in tqdm(zip(data_loader, predictions), total=len(predictions)):
        qid = line["question_id"]
        qs = line["prompt"]
        assert "text" in line
        ans = line["text"]
        if not ans.endswith('.'):
            ans = ans + '.'
        cur_prompt = "Question: " + qs + "\nAnswer: " + ans + "\nIs the answer to the question correct, yes or no?"

        input_ids = input_ids.to(device='cuda', non_blocking=True)


        with torch.inference_mode():
            out = model.generate(
                input_ids,
                images=image_tensor.to(dtype=torch.float16, device='cuda', non_blocking=True),
                image_sizes=image_sizes,
                do_sample=True if args.temperature > 0 else False,
                temperature=args.temperature,
                top_p=args.top_p,
                num_beams=args.num_beams,
                max_new_tokens=args.max_new_tokens,
                return_dict_in_generate=True,
                output_scores=True,
                **gen_kwargs)
    
        sequences = out.sequences
        scores = out.scores

        special_tokens=list([tokenizer.bos_token_id, tokenizer.pad_token_id, tokenizer.eos_token_id])
        generated_text = tokenizer.batch_decode(sequences, skip_special_tokens=True)[0].strip()
        seq_probs, seq_log_probs = list(), list()
        for si, el in enumerate(scores):
            #print (i, si, len(scores))
            try:
                if sequences[0][si+1] in special_tokens:
                    # print (f"{si}, SKIP")
                    continue
            except IndexError:
                if sequences[0][si] in special_tokens:
                    # print (f"{si}, SKIP")
                    continue
            
            token_logprob = el
            token_probs = torch.nn.functional.softmax(token_logprob, dim=1)
            max_prob = torch.max(token_probs)
            idx = torch.argmax(token_probs)
            # print (si, idx)
            max_log_prob = torch.log(max_prob)
            seq_probs.append(max_prob.item())
            seq_log_probs.append(max_log_prob.item())
        if len(seq_probs) > 0:
            avg_prob = sum(seq_probs) / len(seq_probs)
            avg_log_prob = sum(seq_log_probs) / len(seq_log_probs)
            # answer prob is the product of all elements in seq_probs
            answer_prob = 1.0
            for p in seq_probs:
                answer_prob *= p
        else:
            avg_prob, avg_log_prob = 0.0, 0.0
        ans_w_prob = {"text": generated_text, "average_prob": avg_prob, "average_log_prob": avg_log_prob, "text_prob": answer_prob}
        lower_text = generated_text.lower()
        if lower_text not in ["yes", "no"]:
            print(f"Generated text is not yes or no: {lower_text}")
        # if lower_text == "yes":
        #     ans_w_prob["yes_prob"] = answer_prob
        #     ans_w_prob["no_prob"] = 1 - answer_prob
        # elif lower_text == "no":
        #     ans_w_prob["yes_prob"] = 1 - answer_prob
        #     ans_w_prob["no_prob"] = answer_prob
        # else:
        token_probs = torch.nn.functional.softmax(scores[0], dim=1)
        lower_yes_tok_idx = tokenizer(["yes"]).input_ids[0]
        lower_no_tok_idx = tokenizer(["no"]).input_ids[0]
        yes_tok_idx = tokenizer(["Yes"]).input_ids[0]
        no_tok_idx = tokenizer(["No"]).input_ids[0]
        if isinstance(yes_tok_idx, list):
            try:
                yes_tok_idx = yes_tok_idx[1]
                no_tok_idx = no_tok_idx[1]
                lower_no_tok_idx = lower_no_tok_idx[1]
                lower_yes_tok_idx = lower_yes_tok_idx[1]
            except IndexError:
                # this is quick fix for 34b model, need to double check
                yes_tok_idx = yes_tok_idx[0]
                no_tok_idx = no_tok_idx[0]
                lower_no_tok_idx = lower_no_tok_idx[0]
                lower_yes_tok_idx = lower_yes_tok_idx[0]
                print(f"yes_tok_idx: {yes_tok_idx}, no_tok_idx: {no_tok_idx}, lower_no_tok_idx: {lower_no_tok_idx}, lower_yes_tok_idx: {lower_yes_tok_idx}")
        lower_yes_prob = token_probs[:, lower_yes_tok_idx].item()
        lower_no_prob = token_probs[:, lower_no_tok_idx].item()

        yes_prob = token_probs[:, yes_tok_idx].item()
        no_prob = token_probs[:, no_tok_idx].item()

        yes_prob = max(yes_prob, lower_yes_prob)
        no_prob = max(no_prob, lower_no_prob)
        
        scaled_yes_prob = yes_prob / (yes_prob + no_prob)
        scaled_no_prob = no_prob / (yes_prob + no_prob)
        ans_w_prob["yes_prob"] = scaled_yes_prob
        ans_w_prob["no_prob"] =  scaled_no_prob

        # serialize ans_w_prob, convert Tensor to float
        for k, v in ans_w_prob.items():
            if isinstance(v, torch.Tensor):
                ans_w_prob[k] = float(v)
        ans_id = shortuuid.uuid()
        ans_item ={
            "question_id": qid,
            "question": qs,
            "prompt": cur_prompt,
            "answer_id": ans_id,
            "model_id": model_name,
            "metadata": {}}
        ans_item.update(ans_w_prob)
        # import ipdb; ipdb.set_trace()
        ans_outputs.append(ans_item)

    with File.open(answers_file, "w") as ans_file:
        for d in ans_outputs:
            ans_file.write(json.dumps(d) + "\n")


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--model-path", type=str, default="facebook/opt-350m")
    parser.add_argument("--model-base", type=str, default=None)
    parser.add_argument("--image-folder", type=str, default="")
    parser.add_argument("--question-file", type=str, default="tables/question.jsonl")
    parser.add_argument("--pred-file", type=str, default="tables/question.jsonl")
    parser.add_argument("--answers-file", type=str, default="answer.jsonl")
    parser.add_argument("--conv-mode", type=str, default="llava_v1")
    parser.add_argument("--num-chunks", type=int, default=1)
    parser.add_argument("--chunk-idx", type=int, default=0)
    parser.add_argument("--temperature", type=float, default=0)
    parser.add_argument("--top_p", type=float, default=None)
    parser.add_argument("--num_beams", type=int, default=1)
    parser.add_argument("--max_new_tokens", type=int, default=1)
    parser.add_argument("--overwrite", action='store_true')
    args = parser.parse_args()

    eval_model(args)
